# -*- coding: utf-8 -*-
# full demo
import requests
from bs4 import BeautifulSoup
import time


none_page = r'javascript:void(0);'

base_url = r'https://news.nau.edu.cn'
url = r'https://news.nau.edu.cn/5797/list.htm'

f = open('news.txt', 'w', encoding='utf-8')

for i in range(10):
    req = requests.get(url)
    # print(req.encoding)  # ISO-8859-1
    soup = BeautifulSoup(req.text.encode('ISO-8859-1').decode('utf-8'), features='lxml')
    cols = soup.select('.cols')
    for col in cols:
        title = col.a['title']
        link = base_url + col.a['href']
        date = col.select('.cols_meta')[0].get_text()
        print(f'{date}: {title} {link}')
        f.write(f'{date}: {title} {link}\n')
    next_page = soup.select('.next')[0]['href']
    if next_page == none_page:
        break
    print(f'next page: {base_url + next_page}')
    next_page = base_url + next_page
    url = next_page
    time.sleep(2)

f.close()




# import os
#
#
# current_url = 'https://example.com/b/c.html?a=1&b=2'
# href = '/folder/big/a.jpg'
# absolute_url = os.path.dirname(current_url) + href
# print(absolute_url)
